# === load relevant libraries
library(data.table)
library(ggplot2)
library(DescTools)
library(plm)
library(lfe)
library(dplyr)
library(reshape)

rm(list = ls())

# sort styles by lagged exponential sum of ratings
data = readRDS('input_data/morningstar_style_data.RDS')
data = data[yyyymm >= 199102, list(yyyymm, category, ret, expSum_1)]
data = data[order(yyyymm, expSum_1)]
for (this in unique(data[, yyyymm])){
  data[yyyymm == this, bin := 1:9]
}
rm(this)
data[, period := ifelse(yyyymm > 200206, '2_after 2002','1_before 2002')]

# compute CAPM alpha
tmp = readRDS('input_data/return_factors.RDS')
data = merge(data, tmp, by = c('yyyymm')); rm(tmp)
data[, ret_rf := ret - rf] # subtract risk-free rate
output_capm_alpha = data.table()
for (thisPeriod in unique(data[, period])){
  for (i in 1:9){
    fit = summary(felm(ret_rf ~ mktRf, data[(period == thisPeriod) & (bin == i)]))
    output_capm_alpha = rbind(output_capm_alpha, data.table(period = thisPeriod, bin = i, 
                                            alpha = 100*fit$coef[1,1], se = 100*fit$coef[1,2]))
  }  
}
rm(i, thisPeriod, fit)

# demean returns by month and summarize
data = merge(data, data[, list(m_ret = mean(ret)), yyyymm])
data[, ret := ret - m_ret]
output_ret = data[, list(ret = 100*mean(ret), se_ret = 100*sd(ret)/sqrt(length(yyyymm))), list(period, bin)]

# output: Panel A, estimates
cast(output_ret[, list(period, bin, round(ret,2))], period ~ bin)

# output: Panel A, standard errors
cast(output_ret[, list(period, bin, round(se_ret,2))], period ~ bin)

# output: Panel B, estimates
cast(output_capm_alpha[, list(period, bin, round(alpha,2))], period ~ bin)

# output: Panel B, standard errors
cast(output_capm_alpha[, list(period, bin, round(se,2))], period ~ bin)

